import os
import pytesseract
from PIL import Image


# Step 1: OCR - Extract text from the image
def extract_text_from_image(image_path):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img)
    return text


# Step 2: Text Analysis - Identify patterns or keywords
def is_invoice(text):
    keywords = ["Invoice", "Total", "Amount", "Date"]  # Add more keywords as needed
    for keyword in keywords:
        if keyword.lower() in text.lower():
            return True
    return False


# Step 3: Process all files in a directory
def process_directory(directory):
    for filename in os.listdir(directory):
        if (
            filename.endswith(".jpg")
            or filename.endswith(".png")
            or filename.endswith(".webp")
        ):  # Adjust file extensions as needed
            image_path = os.path.join(directory, filename)
            text = extract_text_from_image(image_path)
            print(f"{filename}: --->")
            print(text)
            print("<--")
            # if is_invoice(text):
            #     print(f"{filename}: This is likely an invoice.")
            #     print(text)
            # else:
            #     print(f"{filename}: This is not an invoice.")


# Step 4: Example usage
directory_path = "img"
process_directory(directory_path)
